import functools
import hashlib
import io
import json
import logging
import os
import shutil
import time
import zipfile

import httpx
from jsonschema.exceptions import ValidationError

from app.config import PCM_WEB_CACHE_FRESH_TIME, PCM_WEB_CACHE_LIFE_TIME, PCM_WEB_MAX_DOWNLOAD_SIZE, PCM_WEB_MAX_PACKAGES_SIZE, PCM_WEB_MAX_REPO_SIZE

from .lock import FSLock
from .schemas import PACKAGES_VALIDATOR, REPO_VALIDATOR

READ_SIZE = 64 * 1024  # 64kb

logger = logging.getLogger(__name__)


def getsha256(filename):
    hash = hashlib.sha256()
    with io.open(filename, "rb") as f:
        data = f.read(READ_SIZE)
        while data:
            hash.update(data)
            data = f.read(READ_SIZE)
    return hash.hexdigest()


def load_json_file(file_name: str) -> dict:
    with io.open(file_name, encoding="utf-8") as f:
        try:
            return json.load(f)
        except UnicodeDecodeError:
            raise RuntimeError("Loaded resource is not valid utf-8") from None
        except json.JSONDecodeError:
            raise RuntimeError("Downloaded resource is not valid json") from None


async def download_file(url: str, path: str, max_size: int) -> bool:
    logger.info(f"Downloading {url} to {path}")

    async with httpx.AsyncClient() as client:
        try:
            async with client.stream('GET', url, follow_redirects=True) as response:
                response.raise_for_status()

                total = response.headers.get('Content-length', None)
                if total:
                    if int(total) > max_size:
                        raise RuntimeError("Http response is too large")

                bytes_written = 0

                with io.open(path, "wb") as f:
                    async for chunk in response.aiter_bytes(10240):
                        f.write(chunk)
                        bytes_written += len(chunk)

                        if bytes_written > max_size:
                            raise RuntimeError("Http response is too large")

            return True

        except httpx.HTTPStatusError as e:
            raise RuntimeError(
                f"Error downloading url {url}\n"
                f"HTTP code: {e.response.status_code}") from None
        except Exception as e:
            raise RuntimeError(f"Error downloading url {url}\n{e}") from None


def get_repo_id(repourl: str):
    hash = hashlib.sha256(repourl.encode("utf-8")).hexdigest()
    return hash[:16]


def on_shutil_error(func, path, exc_info):
    logger.error("%s was unable to remove %s",
                 func.__name__, path, exc_info=exc_info)


def cleanup():
    logger.info("Running cache cleanup")

    for path, _, files in os.walk('cache', topdown=True):
        if path == 'cache':
            continue

        if 'packages.json' not in files or 'repository.json' not in files:
            shutil.rmtree(path, onerror=on_shutil_error)
            continue

        cache_atime = os.path.getatime(os.path.join(path, 'repository.json'))
        current_time = int(time.time())

        logger.debug("Cache times for %s: %d %d",
                     path, cache_atime, current_time)

        if current_time - cache_atime > PCM_WEB_CACHE_LIFE_TIME:
            shutil.rmtree(path, onerror=on_shutil_error)


# There is plenty of room for optimization here. Current implementation uses
# trivial filesystem lock to avoid different unicorn threads stepping on each
# other when downloading repo data.
# More sophisticated implementation would use different shared/exclusive locks
# for read/write operations and would narrow the timing of the lock + use a
# layer of in memory cache on top.
# Next level of improvement would be to avoid using FS cache at all and leverage
# redis or other db which would also allow faster locking.
class WithLockAndCleanup:
    def __init__(self, func):
        functools.update_wrapper(self, func)
        self.func = func

    async def __call__(self, *args, **kwargs):
        cache_dir = args[1]

        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)

        try:
            async with FSLock(cache_dir):
                return await self.func(*args, **kwargs)
        except:
            shutil.rmtree(cache_dir, onerror=on_shutil_error)
            raise


@WithLockAndCleanup
async def cache_repo(repourl: str, cache_dir: str):
    repo_path = os.path.join(cache_dir, 'repository.json')
    packages_path = os.path.join(cache_dir, 'packages.json')
    resources_path = os.path.join(cache_dir, 'resources.zip')

    need_download = True

    if os.path.exists(repo_path):
        cache_mtime = os.path.getmtime(repo_path)
        current_time = int(time.time())

        if current_time - cache_mtime < PCM_WEB_CACHE_FRESH_TIME:
            need_download = False

    if need_download:
        # repo is not supposed to be too big
        await download_file(repourl, repo_path, PCM_WEB_MAX_REPO_SIZE)

    repo = load_json_file(repo_path)

    try:
        REPO_VALIDATOR.validate(repo)
    except ValidationError as e:
        raise RuntimeError(
            f"Resource does not appear to be a valid PCM repository:\n{e.message}") from None

    need_download = True
    if os.path.exists(packages_path):
        packages_mtime = repo["packages"]["update_timestamp"]
        cache_mtime = os.path.getmtime(packages_path)

        if cache_mtime > packages_mtime + PCM_WEB_CACHE_FRESH_TIME:
            need_download = False

        if "sha256" in repo["packages"]:
            cache_sha = getsha256(packages_path)
            if cache_sha != repo["packages"]["sha256"]:
                need_download = True

    if need_download:
        # packages can be a bit bigger
        await download_file(repo["packages"]["url"], packages_path, PCM_WEB_MAX_PACKAGES_SIZE)

        if "sha256" in repo["packages"]:
            cache_sha = getsha256(packages_path)
            if cache_sha != repo["packages"]["sha256"]:
                raise RuntimeError(
                    "Packages hash does not match repository data")

    packages = load_json_file(packages_path)

    try:
        PACKAGES_VALIDATOR.validate(packages)
    except ValidationError as e:
        raise RuntimeError(
            f"Repository packages do not appear to be valid:\n{e.message}") from None

    if "resources" in repo:
        need_download = True

        if os.path.exists(resources_path):
            resources_mtime = repo["resources"]["update_timestamp"]
            cache_mtime = os.path.getmtime(resources_path)

            if cache_mtime > resources_mtime + PCM_WEB_CACHE_FRESH_TIME:
                need_download = False

            if "sha256" in repo["resources"]:
                cache_sha = getsha256(resources_path)
                if cache_sha != repo["resources"]["sha256"]:
                    need_download = True

        if need_download:
            await download_file(repo["resources"]["url"], resources_path, PCM_WEB_MAX_DOWNLOAD_SIZE)

            if "sha256" in repo["resources"]:
                cache_sha = getsha256(resources_path)
                if cache_sha != repo["resources"]["sha256"]:
                    raise RuntimeError(
                        "Resources hash does not match repository data")

    return repo, packages, resources_path if "resources" in repo else None


async def get_packages(repourl: str):
    cache_dir = os.path.join('cache', get_repo_id(repourl))
    repo, packages, resources_path = await cache_repo(repourl, cache_dir)
    packages["repo"] = repo

    if resources_path:
        icons = set()
        resources = zipfile.ZipFile(resources_path, "r")

        for entry in resources.infolist():
            if entry.is_dir():
                continue

            if os.path.basename(entry.filename) == "icon.png":
                icons.add(os.path.dirname(entry.filename))

        for i, pkg in enumerate(packages["packages"]):
            if pkg["identifier"] in icons:
                packages["packages"][i]["has_icon"] = 1

        if resources:
            resources.close()

    return packages


# There is room for optimization here as well. Instead of getting
# each icon one by one we could return them in batches. This would
# need a bit more javascript to load the blobs into images instead
# of just specifying src attribute.
async def get_icon(repourl: str, pkgid: str):
    cache_dir = os.path.join('cache', get_repo_id(repourl))
    _, _, resources_path = await cache_repo(repourl, cache_dir)

    with zipfile.ZipFile(resources_path, "r") as resources:
        entry = resources.getinfo(f"{pkgid}/icon.png")

        return resources.read(entry), os.path.getmtime(resources_path)
